rm(list=ls())
library(here)
## Warning: package 'here' was built under R version 4.2.2
## here() starts at C:/Users/pprasa6/Documents/GitHub/globalmix-mozambique
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
participants <- readRDS(paste0(here(),"/data/clean/participant_data_aim1.RDS"))
contacts <- readRDS(paste0(here(),"/data/clean/contact_data_aim1.RDS"))
## Subset contacts to the IDs in participant list only
contacts <- contacts %>%
dplyr::filter(rec_id %in% unlist(participants$rec_id))
# Filter contact relationship == self (you cannot have a contact with yourself)
contacts <- contacts %>%
dplyr::filter(hh_member_relationship != "Self")
# relationship
contacts$hh_membership <- factor(contacts$hh_membership,
levels = c("Member", "Non-member"))
# recategorize contact locations
contacts <- contacts %>%
mutate(cnt_home = ifelse(location_contact___0==1, 1,0),
cnt_school = ifelse(location_contact___2==1, 1,0),
cnt_work = ifelse(location_contact___3==1, 1,0),
cnt_otherplace = ifelse(location_contact___1==1 |
location_contact___4==1 |
location_contact___5==1 |
location_contact___6==1 |
location_contact___7==1 |
location_contact___8==1 |
location_contact___9==1 |
location_contact___10==1 |
location_contact___11==1,1,0))
# masking
contacts <- contacts %>%
mutate(contact_mask2 = case_when(contact_mask == "Yes, for the entire encounter" ~ "Yes",
contact_mask == "Yes, during parts of encounter" ~ "Yes",
contact_mask == "No mask was worn during the encounter" ~ "No",
TRUE ~ "Can't recall"))
contacts$contact_mask2 <- factor(contacts$contact_mask2, levels = c("Yes", "No",
"Can't recall"))
contacts$cnt_home <- factor(contacts$cnt_home, levels = c(1, 0),
labels = c("Yes", "No"))
contacts$cnt_work <- factor(contacts$cnt_work, levels = c(1, 0),
labels = c("Yes", "No"))
contacts$cnt_school <- factor(contacts$cnt_school, levels = c(1, 0),
labels = c("Yes", "No"))
contacts$cnt_otherplace <- factor(contacts$cnt_otherplace, levels = c(1, 0),
labels = c("Yes", "No"))
df_contact <- contacts %>%
dplyr::select(-study_site) %>%
left_join(participants, by=("rec_id"))
df_contact_d1 <- df_contact %>%
dplyr::filter(study_day==1)
day1_num_contacts <- df_contact_d1 %>%
dplyr::group_by(rec_id, study_site) %>%
dplyr::summarize(num_contacts = n()) %>%
dplyr::select(rec_id, num_contacts)
## `summarise()` has grouped output by 'rec_id'. You can override using the
## `.groups` argument.
df_contact_d1 <- left_join(df_contact_d1, day1_num_contacts, by="rec_id" )
df_contact_d2 <- df_contact %>%
dplyr::filter(study_day==2)
day2_num_contacts <- df_contact_d2 %>%
dplyr::group_by(rec_id, study_site) %>%
dplyr::summarize(num_contacts = n()) %>%
dplyr::select(rec_id, num_contacts)
## `summarise()` has grouped output by 'rec_id'. You can override using the
## `.groups` argument.
df_contact_d2 <- left_join(df_contact_d2, day2_num_contacts, by="rec_id" )
both_num_contacts <- df_contact %>%
dplyr::group_by(rec_id, study_day, study_site) %>%
dplyr::summarize(num_contacts = n()) %>%
dplyr::select(rec_id, study_day, num_contacts)
## `summarise()` has grouped output by 'rec_id', 'study_day'. You can override
## using the `.groups` argument.
df_contact <- left_join(df_contact, both_num_contacts, by=c("rec_id", "study_day") )
# rural contacts
df_contact_rural <- df_contact %>%
dplyr::filter(study_site == "Rural")
## specify participant characteristic stratification for cross tabs/analysis
variables <- data.frame(var=c("participant_sex","participant_age", "mask_uselb",
"hh_membership","occupation", "weekday", "enrolled_school",
"touch_contact", "where_contact", "cnt_home", "cnt_school",
"cnt_work", "cnt_otherplace", "frequency_contact",
"known_contact", "contact_mask2", "ari_symptom", "age_symptom"),
# "hh_member_relationship",
name=c("Sex", "Age", "Do you wear a mask?",
"Household membership", "Occupation", "Weekday/Weekend",
"Enrolled in school", "Did you touch?", "Contact location",
"Contact at home", "Contact at work",
"Contact at school", "Contact in other locations",
"Frequency of contact", "Do you know the contact?",
"Contact wearing mask", "ARI symptoms", "AGE symptoms")) %>%
# "Relationship to household member",
mutate(var = as.character(var),
name = as.character(name))
list <- list(0)
for (i in 1:nrow(variables)){
x <- df_contact_rural[,variables$var[[i]]]
# include another variable n as the number of participants per strata
variables$var[[i]]
# Number and proportion of contacts in each strata
t0 <- as.data.frame(cbind(table(x), # Total
round(prop.table(table(x))*100, digits=0) # Proportion
)) # number of participants
colnames(t0)[1:2] <- c("Total","Col")
Tot <- rep("",5)
# Median contacts for day 1
t1 <- df_contact_d1 %>%
dplyr::filter(study_site == "Rural") %>%
# since this is total contacts per person, keep only 1 distinct record
distinct(rec_id, .keep_all = TRUE) %>%
dplyr::group_by(.dots = variables$var[[i]]) %>%
# na.omit() %>%
do(data.frame(t(quantile(.$num_contacts, na.rm=TRUE, probs=c(0.25,0.5,0.75))))) # Median and IQR
t1$med_contact <- as.character(paste(t1$X50.," (",t1$X25.,"-",t1$X75.,")",sep="")) # Formatting for export
# Median contacts for day 2
t2 <- df_contact_d2 %>%
dplyr::filter(study_site == "Rural") %>%
# since this is total contacts per person, keep only 1 distinct record
distinct(rec_id, .keep_all = TRUE) %>%
dplyr::group_by(.dots = variables$var[[i]]) %>%
# na.omit() %>%
do(data.frame(t(quantile(.$num_contacts, na.rm=TRUE, probs=c(0.25,0.5,0.75))))) # Median and IQR
t2$med_contact <- as.character(paste(t2$X50.," (",t2$X25.,"-",t2$X75.,")",sep="")) # Formatting for export
# Bind columns together and select relevant columns
t0<-qpcR:::cbind.na(t0, t1$med_contact, t2$med_contact)
# convert to numerical and round off?
t0<- t0[,c(1,2,3,4)]
# rename total column
t0$Total <- paste(t0$Total," (","", t0$Col,")",sep="")
t0 <- t0[,c(1,3,4)]
t0[,1]<-as.character(t0[,1])
t0[,2]<-as.character(t0[,2])
t0<-rbind(Tot, t0)
rownames(t0)[1]<-variables$name[[i]]
list[[i]] <- t0
}
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
## ℹ The deprecated feature was likely used in the dplyr package.
## Please report the issue at <]8;;https://github.com/tidyverse/dplyr/issueshttps://github.com/tidyverse/dplyr/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
# format table
res_restruct<- function(res){
res1 <- lapply(res, as.data.frame)
res1 <- do.call(rbind, res1)
return(res1)
}
table3 <- res_restruct(list)
colnames(table3) <- c("Total (%)", "Day 1", "Day 2")
table3 <- knitr::kable(table3, digits = 0, align = "r") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
table3
|
|
Total (%)
|
Day 1
|
Day 2
|
|
Sex
|
|
|
|
|
Female
|
5673 (48)
|
8 (4-11)
|
7 (4-11)
|
|
Male
|
6109 (52)
|
8 (5-12)
|
8 (5-11)
|
|
Age
|
|
|
|
|
<6mo
|
453 (4)
|
3 (1-6)
|
3 (1-6)
|
|
6-11mo
|
1013 (9)
|
7 (3-9)
|
6 (3.5-9)
|
|
1-4y
|
1129 (10)
|
5 (2-8.5)
|
5 (2-8.25)
|
|
5-9y
|
1268 (11)
|
8.5 (6-12)
|
9 (6-13)
|
|
10-14y
|
1266 (11)
|
10 (8-14)
|
10 (7-12)
|
|
15-19y
|
1532 (13)
|
12 (8-16.25)
|
11 (7-14)
|
|
20-29y
|
1292 (11)
|
9 (7-12.25)
|
9 (6-12)
|
|
30-39y
|
1193 (10)
|
8 (6-11)
|
8 (6-12)
|
|
40-59y
|
1804 (15)
|
10 (6-13)
|
8 (6-11)
|
|
60+y
|
832 (7)
|
5.5 (4-10)
|
6 (3-9)
|
|
Do you wear a mask?
|
|
|
|
|
Yes
|
8585 (73)
|
9 (6-12)
|
8 (6-12)
|
|
No
|
3197 (27)
|
5 (3-9)
|
5 (3-9)
|
|
No response
|
0 (0)
|
NA
|
NA
|
|
Household membership
|
|
|
|
|
Member
|
3965 (34)
|
9 (6-12)
|
8 (6-12)
|
|
Non-member
|
7817 (66)
|
3 (1-5)
|
2 (1-5.5)
|
|
Occupation
|
|
|
|
|
Child
|
1466 (14)
|
5 (2-8)
|
5 (2-8)
|
|
Unemployed
|
1569 (16)
|
7 (5-12)
|
7 (4-10)
|
|
Student
|
3290 (33)
|
10 (8-14)
|
9 (7-13)
|
|
Homemaker
|
137 (1)
|
9 (7-9)
|
6 (5-10)
|
|
Casual laboror
|
398 (4)
|
8 (6-11.5)
|
8 (6-10.75)
|
|
Farmer
|
1231 (12)
|
9 (6-12.5)
|
7 (6-10)
|
|
Fisherman
|
73 (1)
|
17.5 (14.25-20.75)
|
19 (14.5-23.5)
|
|
Business person
|
146 (1)
|
10 (7.5-10.5)
|
12 (7-14.5)
|
|
Office worker
|
524 (5)
|
8 (6-10.5)
|
8 (5-11)
|
|
Retired
|
58 (1)
|
5 (4-5)
|
7 (4-8)
|
|
Other
|
1226 (12)
|
10 (7-12.75)
|
9 (7-12.75)
|
|
11
|
NA (NA)
|
6 (3-9)
|
6 (2-9)
|
|
Weekday/Weekend
|
|
|
|
|
Weekday
|
8232 (70)
|
8 (5-12)
|
8 (5-11)
|
|
Weekend
|
3550 (30)
|
7 (3-10)
|
7 (4-10)
|
|
Enrolled in school
|
|
|
|
|
Yes1
|
3603 (32)
|
9 (7-13)
|
9 (6-13)
|
|
No1
|
7686 (68)
|
7 (4-10)
|
7 (3.5-10)
|
|
111
|
NA (NA)
|
8.5 (8-13.5)
|
10 (6.25-14)
|
|
Did you touch?
|
|
|
|
|
Yes2
|
7493 (64)
|
8 (4-11)
|
8 (4-11)
|
|
No2
|
4275 (36)
|
9 (5-11)
|
7 (5-10)
|
|
I don’t remember
|
12 (0)
|
14 (13-15)
|
5 (5-5)
|
|
112
|
NA (NA)
|
8 (8-8)
|
NA
|
|
Contact location
|
|
|
|
|
Indoors
|
734 (6)
|
9.5 (7-10)
|
10 (7.5-16)
|
|
Outdoors
|
5790 (49)
|
7 (4-11)
|
6 (2-10)
|
|
Both
|
5257 (45)
|
8 (5-11)
|
8 (5-11)
|
|
Contact at home
|
|
|
|
|
Yes3
|
7297 (62)
|
8 (5-12)
|
8 (5-11)
|
|
No3
|
4485 (38)
|
6 (3-8)
|
6 (4-9.5)
|
|
Contact at work
|
|
|
|
|
Yes4
|
524 (4)
|
10.5 (7.75-14.75)
|
8 (7-12)
|
|
No4
|
11258 (96)
|
8 (5-11)
|
7 (4-11)
|
|
Contact at school
|
|
|
|
|
Yes5
|
389 (3)
|
8 (5-9.5)
|
10 (10-13)
|
|
No5
|
11393 (97)
|
8 (5-11)
|
7 (4-11)
|
|
Contact in other locations
|
|
|
|
|
Yes6
|
5774 (49)
|
8 (3-11)
|
7 (3-10)
|
|
No6
|
6008 (51)
|
8 (5-12)
|
8 (5-12)
|
|
Frequency of contact
|
|
|
|
|
Never met before
|
399 (3)
|
8.5 (7-10.75)
|
5.5 (4.75-7)
|
|
Rarely
|
452 (4)
|
6 (4.25-10)
|
4.5 (4-8.75)
|
|
Daily or almost daily
|
8953 (76)
|
8 (5-11)
|
8 (5-11)
|
|
1-3 times per week
|
1429 (12)
|
4 (4-7)
|
9 (2.5-11.5)
|
|
Once every 2 weeks
|
319 (3)
|
1 (1-1)
|
4 (2.5-5.5)
|
|
Once per month
|
181 (2)
|
7 (7-7)
|
7 (6-8)
|
|
Once every 3 months
|
47 (0)
|
4.5 (2.75-6.25)
|
2.5 (2.25-2.75)
|
|
Do you know the contact?
|
|
|
|
|
Never met before1
|
326 (8)
|
7 (7-7)
|
5 (4-10)
|
|
<1 yr
|
736 (17)
|
7.5 (4.25-11)
|
7 (4-9)
|
|
1-2 yrs
|
422 (10)
|
8 (6-14)
|
9 (6-15)
|
|
3-5 yrs
|
732 (17)
|
9 (7-11)
|
8 (6.5-10)
|
|
6-10 yrs
|
769 (18)
|
8 (6-12)
|
8 (5-12)
|
|
>10 yrs
|
1303 (30)
|
9 (7-13)
|
9 (6-12)
|
|
113
|
NA (NA)
|
3 (1-5)
|
2 (1-5)
|
|
Contact wearing mask
|
|
|
|
|
Yes7
|
1930 (16)
|
9 (6-13)
|
8 (5-13)
|
|
No7
|
9796 (83)
|
8 (5-11)
|
7 (4-11)
|
|
Can’t recall
|
56 (0)
|
10 (10-10)
|
7.5 (6.25-8.75)
|
|
ARI symptoms
|
|
|
|
|
No symptom
|
9583 (81)
|
8 (5-11)
|
7 (5-10)
|
|
>1 symptom
|
2199 (19)
|
8 (4-12.5)
|
8 (3-12.5)
|
|
AGE symptoms
|
|
|
|
|
Yes8
|
311 (3)
|
9 (8-13)
|
9 (8-16)
|
|
No8
|
11471 (97)
|
8 (5-11)
|
7 (4-11)
|
# urban contacts
df_contact_urban <- df_contact %>%
dplyr::filter(study_site == "Urban")
list <- list(0)
for (i in 1:nrow(variables)){
x <- df_contact_urban[,variables$var[[i]]]
# include another variable n as the number of participants per strata
variables$var[[i]]
# Number and proportion of contacts in each strata
t0 <- as.data.frame(cbind(table(x), # Total
round(prop.table(table(x))*100, digits=0) # Proportion
)) # number of participants
colnames(t0)[1:2] <- c("Total","Col")
Tot <- rep("",5)
# Median contacts for day 1
t1 <- df_contact_d1 %>%
dplyr::filter(study_site == "Urban") %>%
# since this is total contacts per person, keep only 1 distinct record
distinct(rec_id, .keep_all = TRUE) %>%
dplyr::group_by(.dots = variables$var[[i]]) %>%
# na.omit() %>%
do(data.frame(t(quantile(.$num_contacts, na.rm=TRUE, probs=c(0.25,0.5,0.75))))) # Median and IQR
t1$med_contact <- as.character(paste(t1$X50.," (",t1$X25.,"-",t1$X75.,")",sep="")) # Formatting for export
# Median contacts for day 2
t2 <- df_contact_d2 %>%
dplyr::filter(study_site == "Urban") %>%
# since this is total contacts per person, keep only 1 distinct record
distinct(rec_id, .keep_all = TRUE) %>%
dplyr::group_by(.dots = variables$var[[i]]) %>%
# na.omit() %>%
do(data.frame(t(quantile(.$num_contacts, na.rm=TRUE, probs=c(0.25,0.5,0.75))))) # Median and IQR
t2$med_contact <- as.character(paste(t2$X50.," (",t2$X25.,"-",t2$X75.,")",sep="")) # Formatting for export
# Bind columns together and select relevant columns
t0<-qpcR:::cbind.na(t0, t1$med_contact, t2$med_contact)
# convert to numerical and round off?
t0<- t0[,c(1,2,3,4)]
# rename total column
t0$Total <- paste(t0$Total," (","", t0$Col,")",sep="")
t0 <- t0[,c(1,3,4)]
t0[,1]<-as.character(t0[,1])
t0[,2]<-as.character(t0[,2])
t0<-rbind(Tot, t0)
rownames(t0)[1]<-variables$name[[i]]
list[[i]] <- t0
}
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
## Warning in rbind(deparse.level, ...): number of columns of result, 3, is not a
## multiple of vector length 5 of arg 1
# format table
res_restruct<- function(res){
res1 <- lapply(res, as.data.frame)
res1 <- do.call(rbind, res1)
return(res1)
}
table3 <- res_restruct(list)
colnames(table3) <- c("Total (%)", "Day 1", "Day 2")
table3 <- knitr::kable(table3, digits = 0, align = "r") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
table3
|
|
Total (%)
|
Day 1
|
Day 2
|
|
Sex
|
|
|
|
|
Female
|
3827 (47)
|
6 (3-8)
|
5 (3-8)
|
|
Male
|
4310 (53)
|
6 (4-9)
|
6 (4-8)
|
|
11
|
NA (NA)
|
5 (5-5)
|
5 (5-5)
|
|
Age
|
|
|
|
|
<6mo
|
477 (6)
|
4 (2-5)
|
3 (2.25-5)
|
|
6-11mo
|
467 (6)
|
4 (2-7)
|
3.5 (2-6)
|
|
1-4y
|
611 (7)
|
4 (2-6.75)
|
3 (2-7)
|
|
5-9y
|
779 (10)
|
6 (4-9)
|
6 (4-8.75)
|
|
10-14y
|
1055 (13)
|
8 (6-10)
|
7.5 (5.75-9)
|
|
15-19y
|
1042 (13)
|
9 (6-12)
|
8 (5-10)
|
|
20-29y
|
743 (9)
|
6 (4-7.25)
|
6 (4.75-8)
|
|
30-39y
|
805 (10)
|
6 (4-9)
|
6 (5-8)
|
|
40-59y
|
1497 (18)
|
6 (4-8)
|
5 (4-7)
|
|
60+y
|
671 (8)
|
5 (3-7)
|
5 (3-8)
|
|
Do you wear a mask?
|
|
|
|
|
Yes
|
6677 (82)
|
6 (4-9)
|
6 (4-9)
|
|
No
|
1462 (18)
|
4 (2-7)
|
4 (2-6)
|
|
No response
|
8 (0)
|
4 (4-4)
|
4 (4-4)
|
|
Household membership
|
|
|
|
|
Member
|
3724 (46)
|
6 (4-9)
|
6 (4-8)
|
|
Non-member
|
4423 (54)
|
2 (1-5)
|
2 (1-5)
|
|
Occupation
|
|
|
|
|
Child
|
944 (13)
|
4 (2-6)
|
3 (2-5)
|
|
Unemployed
|
833 (11)
|
6 (4-9)
|
6 (3-9)
|
|
Student
|
2654 (36)
|
7 (5-10)
|
7 (5-9)
|
|
Homemaker
|
305 (4)
|
6 (4.75-8)
|
6 (5-7.25)
|
|
Casual laboror
|
660 (9)
|
5 (4-7)
|
5 (4-7.25)
|
|
Farmer
|
52 (1)
|
3.5 (3-4)
|
3.5 (1.5-8.5)
|
|
Fisherman
|
0 (0)
|
6 (4-8)
|
6 (4-8)
|
|
Business person
|
727 (10)
|
6 (4-8)
|
6 (4-8)
|
|
Office worker
|
737 (10)
|
4 (3-5.5)
|
5 (2.5-6)
|
|
Retired
|
150 (2)
|
9 (8.25-10)
|
9 (6-10)
|
|
Other
|
229 (3)
|
5 (2-7)
|
4 (2-7)
|
|
Weekday/Weekend
|
|
|
|
|
Weekday
|
5980 (74)
|
6 (4-8)
|
6 (3-8)
|
|
Weekend
|
2138 (26)
|
5 (3-8)
|
5 (3-7)
|
|
111
|
NA (NA)
|
12.5 (9.25-15.75)
|
9.5 (7.25-11.75)
|
|
Enrolled in school
|
|
|
|
|
Yes1
|
2926 (37)
|
7 (5-9)
|
7 (5-9)
|
|
No1
|
4960 (63)
|
5 (3-8)
|
5 (3-7.25)
|
|
112
|
NA (NA)
|
6 (4-6)
|
5 (3-9)
|
|
Did you touch?
|
|
|
|
|
Yes2
|
5930 (73)
|
5 (3-8)
|
5 (3-8)
|
|
No2
|
2198 (27)
|
7 (5-8)
|
6 (4.75-9)
|
|
I don’t remember
|
18 (0)
|
7 (6.5-7.5)
|
3 (2.5-3.5)
|
|
Contact location
|
|
|
|
|
Indoors
|
926 (11)
|
7 (4-9)
|
6 (4-8)
|
|
Outdoors
|
2485 (31)
|
5 (3-8)
|
4 (2.25-7.75)
|
|
Both
|
4736 (58)
|
6 (3.75-8)
|
5 (3-8)
|
|
Contact at home
|
|
|
|
|
Yes3
|
5621 (69)
|
6 (4-8)
|
5 (3-8)
|
|
No3
|
2526 (31)
|
5 (3-7.5)
|
5 (3-7.25)
|
|
Contact at work
|
|
|
|
|
Yes4
|
357 (4)
|
9 (9-9)
|
4 (3-6.75)
|
|
No4
|
7790 (96)
|
6 (4-8)
|
5 (3-8)
|
|
Contact at school
|
|
|
|
|
Yes5
|
260 (3)
|
4.5 (2-8.75)
|
4 (3-4)
|
|
No5
|
7887 (97)
|
6 (4-8)
|
5 (3-8)
|
|
Contact in other locations
|
|
|
|
|
Yes6
|
3051 (37)
|
6 (4-8.5)
|
5 (3-8)
|
|
No6
|
5096 (63)
|
6 (4-8)
|
5.5 (3-8)
|
|
Frequency of contact
|
|
|
|
|
Never met before
|
276 (3)
|
5 (1-6)
|
7 (5-8)
|
|
Rarely
|
302 (4)
|
5 (4-9)
|
4 (3.5-5.75)
|
|
Daily or almost daily
|
6413 (79)
|
6 (4-8)
|
5 (3-8)
|
|
1-3 times per week
|
930 (11)
|
4 (2.5-6.5)
|
4 (3-7)
|
|
Once every 2 weeks
|
149 (2)
|
6 (4-6)
|
6 (2.25-7.5)
|
|
Once per month
|
58 (1)
|
4.5 (3.75-5.25)
|
4.5 (3.75-5.25)
|
|
Once every 3 months
|
18 (0)
|
NA
|
3 (3-3)
|
|
Do you know the contact?
|
|
|
|
|
Never met before1
|
272 (7)
|
5 (1-6)
|
5 (5-7)
|
|
<1 yr
|
587 (15)
|
4 (3-6.75)
|
4 (3-6)
|
|
1-2 yrs
|
304 (8)
|
6 (4-9)
|
6 (4-9)
|
|
3-5 yrs
|
359 (9)
|
4 (3-6)
|
5 (3-8)
|
|
6-10 yrs
|
699 (17)
|
6 (5-9.25)
|
6 (4-8)
|
|
>10 yrs
|
1775 (44)
|
7 (4-9)
|
6 (5-9)
|
|
113
|
NA (NA)
|
2 (1-5)
|
2 (1-4)
|
|
Contact wearing mask
|
|
|
|
|
Yes7
|
2170 (27)
|
6 (4-8)
|
5 (4-7)
|
|
No7
|
5958 (73)
|
6 (4-9)
|
6 (3-8)
|
|
Can’t recall
|
19 (0)
|
5 (5-5)
|
8 (7.5-8.5)
|
|
ARI symptoms
|
|
|
|
|
No symptom
|
6685 (82)
|
6 (3-8)
|
5 (3-8)
|
|
>1 symptom
|
1462 (18)
|
6 (4-8)
|
6 (4-8)
|
|
AGE symptoms
|
|
|
|
|
Yes8
|
124 (2)
|
6 (4-8)
|
6 (5-6.75)
|
|
No8
|
8023 (98)
|
6 (4-8)
|
5 (3-8)
|
contacts_age <- df_contact_d1 %>%
dplyr::group_by(rec_id, study_site, participant_age) %>%
# group by id and count number of df_contact_d1
dplyr::summarize(num_contacts = n())
## `summarise()` has grouped output by 'rec_id', 'study_site'. You can override
## using the `.groups` argument.
# contacts by site
contacts_site_d1 <- df_contact_d1 %>%
dplyr::group_by(rec_id, study_site) %>%
dplyr::summarize(num_contacts = n())
## `summarise()` has grouped output by 'rec_id'. You can override using the
## `.groups` argument.
median_contacts_site_d1 <- contacts_site_d1 %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(q25 = quantile(num_contacts, probs = 0.25),
q50 = quantile(num_contacts, probs = 0.5),
q75 = quantile(num_contacts, probs = 0.75))
mean_contacts_site_d1 <- contacts_site_d1 %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(mean = round(mean(num_contacts),1))
ci_contacts_rural_d1 <- contacts_site_d1 %>%
dplyr::filter(study_site == "Rural")
ci_contacts_rural_d1 <- lm(num_contacts ~ 1, ci_contacts_rural_d1)
ci_contacts_rural_d1 <- as.data.frame(round(confint(ci_contacts_rural_d1),1))
ci_contacts_urban_d1 <- contacts_site_d1 %>%
dplyr::filter(study_site == "Urban")
ci_contacts_urban_d1 <- lm(num_contacts ~ 1, ci_contacts_urban_d1)
ci_contacts_urban_d1 <- as.data.frame(round(confint(ci_contacts_urban_d1),1))
# title
title_rural_d1 <- list(
text = "Rural",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
title_urban_d1 <- list(
text = "Urban",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
hist_rural_d1 <- contacts_site_d1 %>%
dplyr::filter(study_site == "Rural") %>%
plotly::plot_ly(x=~num_contacts, colors="#a6611a") %>%
plotly::add_lines(y = range(0:100),
x = mean_contacts_site_d1$mean[1],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:100),
x = median_contacts_site_d1$q50[1],
line = list(color = "grey"),
showlegend = FALSE) %>%
plotly::add_annotations(x = mean_contacts_site_d1$mean[1],
y = 90,
text = paste(mean_contacts_site_d1$mean[1], " (95% CI ",
ci_contacts_rural_d1$`2.5 %`,"-",
ci_contacts_rural_d1$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site_d1$q50[1],
y = 90,
text = paste(median_contacts_site_d1$q50[1], " (95% CI ",
median_contacts_site_d1$q25[1],"-",
median_contacts_site_d1$q75[1],")", sep=""),
showarrow = F, xanchor = "right") %>%
plotly::layout(annotations = title_rural_d1,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Rural")
# hist_rural
hist_urban_d1 <- contacts_site_d1 %>%
dplyr::filter(study_site == "Urban") %>%
plotly::plot_ly(x=~num_contacts, colors="#018571") %>%
add_lines(y = range(0:100),
x = mean_contacts_site_d1$mean[2],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:100),
x = median_contacts_site_d1$q50[2],
line = list(color = "grey"),
showlegend = FALSE) %>%
add_annotations(x = mean_contacts_site_d1$mean[2],
y = 90,
text = paste(mean_contacts_site_d1$mean[2], " (95% CI ",
ci_contacts_urban_d1$`2.5 %`,"-",
ci_contacts_urban_d1$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site_d1$q50[2],
y = 90,
text = paste(median_contacts_site_d1$q50[2], " (95% CI ",
median_contacts_site_d1$q25[2],"-",
median_contacts_site_d1$q75[2],")", sep=""),
showarrow = F, xanchor = "right") %>%
layout(annotations = title_urban_d1,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Urban")
fig1_hist_d1 <- subplot(hist_rural_d1, hist_urban_d1,
nrows=2, shareX = TRUE) %>%
layout(title = 'Day 1 Contact Distributions')
fig1_hist_d1
# urban contacts
df_contact_urban <- df_contact %>%
dplyr::filter(study_site == "Urban")
contacts_age <- df_contact_d2 %>%
dplyr::group_by(rec_id, study_site, participant_age) %>%
# group by id and count number of df_contact_d2
dplyr::summarize(num_contacts = n())
## `summarise()` has grouped output by 'rec_id', 'study_site'. You can override
## using the `.groups` argument.
# contacts by site
contacts_site_d2 <- df_contact_d2 %>%
dplyr::group_by(rec_id, study_site) %>%
dplyr::summarize(num_contacts = n())
## `summarise()` has grouped output by 'rec_id'. You can override using the
## `.groups` argument.
median_contacts_site_d2 <- contacts_site_d2 %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(q25 = quantile(num_contacts, probs = 0.25),
q50 = quantile(num_contacts, probs = 0.5),
q75 = quantile(num_contacts, probs = 0.75))
mean_contacts_site_d2 <- contacts_site_d2 %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(mean = round(mean(num_contacts),1))
ci_contacts_rural_d2 <- contacts_site_d2 %>%
dplyr::filter(study_site == "Rural")
ci_contacts_rural_d2 <- lm(num_contacts ~ 1, ci_contacts_rural_d2)
ci_contacts_rural_d2 <- as.data.frame(round(confint(ci_contacts_rural_d2),1))
ci_contacts_urban_d2 <- contacts_site_d2 %>%
dplyr::filter(study_site == "Urban")
ci_contacts_urban_d2 <- lm(num_contacts ~ 1, ci_contacts_urban_d2)
ci_contacts_urban_d2 <- as.data.frame(round(confint(ci_contacts_urban_d2),1))
# title
title_rural_d2 <- list(
text = "Rural",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
title_urban_d2 <- list(
text = "Urban",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
hist_rural_d2 <- contacts_site_d2 %>%
dplyr::filter(study_site == "Rural") %>%
plotly::plot_ly(x=~num_contacts, colors="#a6611a") %>%
plotly::add_lines(y = range(0:100),
x = mean_contacts_site_d2$mean[1],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:100),
x = median_contacts_site_d2$q50[1],
line = list(color = "grey"),
showlegend = FALSE) %>%
plotly::add_annotations(x = mean_contacts_site_d2$mean[1],
y = 90,
text = paste(mean_contacts_site_d2$mean[1], " (95% CI ",
ci_contacts_rural_d2$`2.5 %`,"-",
ci_contacts_rural_d2$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site_d2$q50[1],
y = 90,
text = paste(median_contacts_site_d2$q50[1], " (95% CI ",
median_contacts_site_d2$q25[1],"-",
median_contacts_site_d2$q75[1],")", sep=""),
showarrow = F, xanchor = "right") %>%
plotly::layout(annotations = title_rural_d2,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Rural")
# hist_rural
hist_urban_d2 <- contacts_site_d2 %>%
dplyr::filter(study_site == "Urban") %>%
plotly::plot_ly(x=~num_contacts, colors="#018571") %>%
add_lines(y = range(0:100),
x = mean_contacts_site_d2$mean[2],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:100),
x = median_contacts_site_d2$q50[2],
line = list(color = "grey"),
showlegend = FALSE) %>%
add_annotations(x = mean_contacts_site_d2$mean[2],
y = 90,
text = paste(mean_contacts_site_d2$mean[2], " (95% CI ",
ci_contacts_urban_d2$`2.5 %`,"-",
ci_contacts_urban_d2$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site_d2$q50[2],
y = 90,
text = paste(median_contacts_site_d2$q50[2], " (95% CI ",
median_contacts_site_d2$q25[2],"-",
median_contacts_site_d2$q75[2],")", sep=""),
showarrow = F, xanchor = "right") %>%
layout(annotations = title_urban_d2,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Urban")
fig1_hist_d2 <- subplot(hist_rural_d2, hist_urban_d2,
nrows=2, shareX = TRUE) %>%
layout(title = 'Day 2 Contact Distributions')
fig1_hist_d2
# urban contacts
df_contact_urban <- df_contact %>%
dplyr::filter(study_site == "Urban")
# contacts by site
contacts_site <- df_contact %>%
dplyr::group_by(rec_id, study_site, study_day, age, participant_sex) %>%
dplyr::summarize(num_contacts = n())
## `summarise()` has grouped output by 'rec_id', 'study_site', 'study_day', 'age'.
## You can override using the `.groups` argument.
median_contacts_site <- contacts_site %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(q25 = quantile(num_contacts, probs = 0.25),
q50 = quantile(num_contacts, probs = 0.5),
q75 = quantile(num_contacts, probs = 0.75))
mean_contacts_site <- contacts_site %>%
dplyr::group_by(study_site) %>%
dplyr::summarize(mean = round(mean(num_contacts),1))
ci_contacts_rural <- contacts_site %>%
dplyr::filter(study_site == "Rural")
ci_contacts_rural <- lm(num_contacts ~ 1, ci_contacts_rural)
ci_contacts_rural <- as.data.frame(round(confint(ci_contacts_rural),1))
ci_contacts_urban <- contacts_site %>%
dplyr::filter(study_site == "Urban")
ci_contacts_urban <- lm(num_contacts ~ 1, ci_contacts_urban)
ci_contacts_urban <- as.data.frame(round(confint(ci_contacts_urban),1))
# title
title_rural <- list(
text = "Rural",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
title_urban <- list(
text = "Urban",
size=14,
# font = f,
xref = "paper", yref = "paper",
yanchor = "bottom", xanchor = "center",
align = "center",
x = 0.5, y = 1,
showarrow = FALSE
)
hist_rural <- contacts_site %>%
dplyr::filter(study_site == "Rural") %>%
plotly::plot_ly(x=~num_contacts, colors="#a6611a") %>%
plotly::add_lines(y = range(0:150),
x = mean_contacts_site$mean[1],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:150),
x = median_contacts_site$q50[1],
line = list(color = "grey"),
showlegend = FALSE) %>%
plotly::add_annotations(x = mean_contacts_site$mean[1],
y = 90,
text = paste(mean_contacts_site$mean[1], " (95% CI ",
ci_contacts_rural$`2.5 %`,"-",
ci_contacts_rural$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site$q50[1],
y = 90,
text = paste(median_contacts_site$q50[1], " (95% CI ",
median_contacts_site$q25[1],"-",
median_contacts_site$q75[1],")", sep=""),
showarrow = F, xanchor = "right") %>%
plotly::layout(annotations = title_rural,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Rural")
# hist_rural
hist_urban <- contacts_site %>%
dplyr::filter(study_site == "Urban") %>%
plotly::plot_ly(x=~num_contacts, colors="#018571") %>%
add_lines(y = range(0:150),
x = mean_contacts_site$mean[2],
line = list(color = "black"),
showlegend = FALSE) %>%
add_lines(y = range(0:150),
x = median_contacts_site$q50[2],
line = list(color = "grey"),
showlegend = FALSE) %>%
add_annotations(x = mean_contacts_site$mean[2],
y = 90,
text = paste(mean_contacts_site$mean[2], " (95% CI ",
ci_contacts_urban$`2.5 %`,"-",
ci_contacts_urban$`97.5 %`,")", sep=""),
showarrow = F, xanchor = "left") %>%
add_annotations(x = median_contacts_site$q50[2],
y = 90,
text = paste(median_contacts_site$q50[2], " (95% CI ",
median_contacts_site$q25[2],"-",
median_contacts_site$q75[2],")", sep=""),
showarrow = F, xanchor = "right") %>%
layout(annotations = title_urban,
xaxis = list(title = "Num of contacts")) %>%
plotly::add_histogram(name="Urban")
fig1_hist <- subplot(hist_rural, hist_urban,
nrows=2, shareX = TRUE) %>%
layout(title = 'Contact Distributions - Both Days')
fig1_hist
Number of Contacts Analysis
Questions: - Should we have separate urban and rural models? - Model
should include random effects (lme4) for - Negative binomial mixed model
not converging at all (no interaction) - Poisson MM is working fine with
no interaction - Poisson MM NOT working when including interaction terms
- Linear MM works fine with and without interaction
library(lme4)
## Loading required package: Matrix
require(MASS)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
# negbin_model <- glmer.nb(num_contacts ~ s_age + participant_sex +
# study_site + (1 | rec_id),
# offset = log(N),
# verbose = TRUE,
# data = df_contact %>% distinct())
# summary(negbin_model)
# nb_int_model <- glmer.nb(num_contacts ~ s_age + participant_sex +
# study_site + participant_sex*study_site +
# s_age*study_site + s_age*participant_sex+
# (1 | rec_id),
# offset = log(N),
# verbose = TRUE,
# data = df_contact %>% distinct())
# summary(nb_int_model)
df_contact$N <- sum(df_contact$num_contacts)
df_contact$s_age <- scale(as.numeric(df_contact$participant_age))
poisson_model <- glmer(num_contacts ~ s_age + participant_sex +
study_site + (1 | rec_id),
offset = log(N),
data = df_contact %>% distinct(),
family = poisson(link = "log"))
summary(poisson_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: num_contacts ~ s_age + participant_sex + study_site + (1 | rec_id)
## Data: df_contact %>% distinct()
## Offset: log(N)
##
## AIC BIC logLik deviance df.resid
## 87517.3 87556.4 -43753.6 87507.3 18454
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8453 -0.2591 0.0239 0.3518 1.3139
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 0.312 0.5586
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.24995 0.02741 -373.887 <2e-16 ***
## s_age 0.13898 0.01455 9.552 <2e-16 ***
## participant_sexMale 0.06213 0.03154 1.970 0.0489 *
## study_siteUrban -0.30971 0.03159 -9.804 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M
## s_age 0.069
## prtcpnt_sxM -0.590 0.009
## stdy_stUrbn -0.560 -0.055 0.000
poi_int_model <- glmer(num_contacts ~ s_age + participant_sex +
study_site + participant_sex*study_site +
s_age*study_site + s_age*participant_sex +
(1 | rec_id),
offset = log(N),
data = df_contact %>% distinct(),
family = poisson(link = "log"))
summary(poi_int_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula:
## num_contacts ~ s_age + participant_sex + study_site + participant_sex *
## study_site + s_age * study_site + s_age * participant_sex +
## (1 | rec_id)
## Data: df_contact %>% distinct()
## Offset: log(N)
##
## AIC BIC logLik deviance df.resid
## 87518.1 87580.7 -43751.1 87502.1 18451
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8457 -0.2590 0.0246 0.3502 1.3146
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 0.3111 0.5577
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.233229 0.031520 -324.659 < 2e-16 ***
## s_age 0.170134 0.024894 6.834 8.23e-12 ***
## participant_sexMale 0.038088 0.044083 0.864 0.3876
## study_siteUrban -0.343044 0.045185 -7.592 3.15e-14 ***
## participant_sexMale:study_siteUrban 0.055007 0.063171 0.871 0.3839
## s_age:study_siteUrban -0.060714 0.029054 -2.090 0.0366 *
## s_age:participant_sexMale -0.003907 0.029052 -0.134 0.8930
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M stdy_U p_M:_U s_g:_U
## s_age 0.070
## prtcpnt_sxM -0.710 -0.014
## stdy_stUrbn -0.693 -0.037 0.492
## prtcpn_M:_U 0.494 0.001 -0.696 -0.713
## s_g:stdy_sU -0.039 -0.552 -0.040 0.053 0.008
## s_g:prtcp_M -0.042 -0.575 0.099 0.004 -0.053 -0.037
linear_model <- lmer(num_contacts ~ s_age + participant_sex +
study_site + (1 | rec_id),
offset = log(N),
data = df_contact %>% distinct())
summary(linear_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: num_contacts ~ s_age + participant_sex + study_site + (1 | rec_id)
## Data: df_contact %>% distinct()
## Offset: log(N)
##
## REML criterion at convergence: 89179.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.7125 -0.2591 0.0205 0.4393 2.9561
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 20.334 4.509
## Residual 5.577 2.362
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -3.3173 0.2160 1356.0778 -15.358 < 2e-16 ***
## s_age 0.8792 0.1131 1383.4643 7.777 1.45e-14 ***
## participant_sexMale 0.3411 0.2484 1362.4466 1.373 0.17
## study_siteUrban -2.6351 0.2488 1362.2500 -10.592 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M
## s_age 0.091
## prtcpnt_sxM -0.589 0.005
## stdy_stUrbn -0.568 -0.055 0.005
lin_int_model <- lmer(num_contacts ~ s_age + participant_sex +
study_site + participant_sex*study_site +
s_age*study_site + s_age*participant_sex +
(1 | rec_id),
offset = log(N),
data = df_contact %>% distinct())
summary(lin_int_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## num_contacts ~ s_age + participant_sex + study_site + participant_sex *
## study_site + s_age * study_site + s_age * participant_sex +
## (1 | rec_id)
## Data: df_contact %>% distinct()
## Offset: log(N)
##
## REML criterion at convergence: 89171.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.7138 -0.2580 0.0214 0.4374 2.9565
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 20.232 4.498
## Residual 5.577 2.362
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) -3.20015 0.24884 1351.41118 -12.860
## s_age 1.20955 0.19383 1379.70405 6.240
## participant_sexMale 0.23656 0.34896 1349.86403 0.678
## study_siteUrban -2.87260 0.35489 1362.69733 -8.094
## participant_sexMale:study_siteUrban 0.30003 0.49702 1360.04831 0.604
## s_age:study_siteUrban -0.70700 0.22596 1381.52334 -3.129
## s_age:participant_sexMale 0.03264 0.22597 1381.59210 0.144
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## s_age 5.80e-10 ***
## participant_sexMale 0.49796
## study_siteUrban 1.26e-15 ***
## participant_sexMale:study_siteUrban 0.54618
## s_age:study_siteUrban 0.00179 **
## s_age:participant_sexMale 0.88516
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M stdy_U p_M:_U s_g:_U
## s_age 0.093
## prtcpnt_sxM -0.709 -0.023
## stdy_stUrbn -0.699 -0.045 0.493
## prtcpn_M:_U 0.496 0.001 -0.699 -0.710
## s_g:stdy_sU -0.052 -0.560 -0.041 0.072 0.005
## s_g:prtcp_M -0.053 -0.572 0.116 0.002 -0.054 -0.031
Outlier analysis using mean as cut off
contact_summaries <- df_contact %>%
distinct() %>%
ungroup() %>%
dplyr::summarize(q25 = quantile(num_contacts, probs = 0.25),
q50 = quantile(num_contacts, probs = 0.5),
q75 = quantile(num_contacts, probs = 0.75),
q90 = quantile(num_contacts, probs = 0.90),
mean = mean(num_contacts))
df_contact$mean_outlier <- if_else(df_contact$num_contacts > contact_summaries$mean, 1, 0)
df_contact$q75_outlier <- if_else(df_contact$num_contacts > contact_summaries$q75, 1, 0)
mean_outlier_model <- glmer(mean_outlier ~ s_age + participant_sex +
study_site + (1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
summary(mean_outlier_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: mean_outlier ~ s_age + participant_sex + study_site + (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 7027.3 7066.5 -3508.7 7017.3 18454
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.04757 -0.00489 -0.00256 0.04458 1.00437
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 308.1 17.55
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.5386 0.4544 -23.190 < 2e-16 ***
## s_age 0.1226 0.2185 0.561 0.57452
## participant_sexMale 0.2063 0.4826 0.428 0.66898
## study_siteUrban -1.4321 0.5205 -2.751 0.00593 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M
## s_age 0.049
## prtcpnt_sxM -0.565 0.038
## stdy_stUrbn -0.375 -0.124 -0.014
mean_out_int_model <- glmer(mean_outlier ~ s_age + participant_sex +
study_site + participant_sex*study_site +
s_age*study_site + s_age*participant_sex +
(1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 5.90394 (tol = 0.002, component 1)
summary(mean_out_int_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## mean_outlier ~ s_age + participant_sex + study_site + participant_sex *
## study_site + s_age * study_site + s_age * participant_sex +
## (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 7140.2 7202.8 -3562.1 7124.2 18451
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.99574 -0.01248 -0.00599 0.07085 1.01126
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 101.3 10.07
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.8257 0.4584 -19.253 < 2e-16 ***
## s_age 0.8870 0.3236 2.741 0.00612 **
## participant_sexMale 0.7274 0.5261 1.383 0.16677
## study_siteUrban -1.2865 0.6079 -2.116 0.03434 *
## participant_sexMale:study_siteUrban -0.5008 0.8245 -0.607 0.54363
## s_age:study_siteUrban -0.7795 0.3692 -2.111 0.03476 *
## s_age:participant_sexMale -0.6835 0.3729 -1.833 0.06681 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M stdy_U p_M:_U s_g:_U
## s_age -0.172
## prtcpnt_sxM -0.680 0.159
## stdy_stUrbn -0.622 0.086 0.497
## prtcpn_M:_U 0.455 -0.137 -0.641 -0.747
## s_g:stdy_sU 0.076 -0.466 -0.140 -0.085 0.226
## s_g:prtcp_M 0.158 -0.686 -0.003 -0.051 0.055 0.003
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 5.90394 (tol = 0.002, component 1)
Outlier analysis with 75th %ile as cut off
q75_outlier_model <- glmer(q75_outlier ~ s_age + participant_sex +
study_site + (1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
summary(q75_outlier_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: q75_outlier ~ s_age + participant_sex + study_site + (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 5878.4 5917.5 -2934.2 5868.4 18454
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.03578 -0.00396 -0.00221 -0.00181 1.04868
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 237.1 15.4
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.01679 0.50567 -21.787 <2e-16 ***
## s_age 0.13066 0.25864 0.505 0.6134
## participant_sexMale 0.08093 0.55152 0.147 0.8833
## study_siteUrban -1.35364 0.63378 -2.136 0.0327 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M
## s_age -0.003
## prtcpnt_sxM -0.570 0.039
## stdy_stUrbn -0.317 -0.079 -0.017
q75_out_int_model <- glmer(q75_outlier ~ s_age + participant_sex +
study_site + participant_sex*study_site +
s_age*study_site + s_age*participant_sex +
(1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
summary(q75_out_int_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## q75_outlier ~ s_age + participant_sex + study_site + participant_sex *
## study_site + s_age * study_site + s_age * participant_sex +
## (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 5884.3 5946.8 -2934.1 5868.3 18451
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.03562 -0.00393 -0.00213 -0.00194 1.04870
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 236.4 15.37
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -11.02212 0.53289 -20.684 <2e-16 ***
## s_age 0.13742 0.39408 0.349 0.727
## participant_sexMale 0.10422 0.63852 0.163 0.870
## study_siteUrban -1.29950 0.92039 -1.412 0.158
## participant_sexMale:study_siteUrban -0.07876 1.26795 -0.062 0.950
## s_age:study_siteUrban -0.24016 0.59698 -0.402 0.687
## s_age:participant_sexMale 0.09842 0.51613 0.191 0.849
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M stdy_U p_M:_U s_g:_U
## s_age -0.031
## prtcpnt_sxM -0.626 0.031
## stdy_stUrbn -0.442 0.035 0.365
## prtcpn_M:_U 0.315 -0.019 -0.504 -0.726
## s_g:stdy_sU 0.006 -0.348 -0.011 0.026 -0.036
## s_g:prtcp_M 0.030 -0.655 -0.019 -0.035 0.015 -0.045
Outlier analysis with 90th %ile as cut off
df_contact$q90_outlier <- if_else(df_contact$num_contacts > contact_summaries$q90, 1, 0)
q90_outlier_model <- glmer(q90_outlier ~ s_age + participant_sex +
study_site +(1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
summary(q90_outlier_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: q90_outlier ~ s_age + participant_sex + study_site + (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 2588.6 2627.7 -1289.3 2578.6 18454
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.03333 -0.00193 -0.00146 -0.00081 1.00288
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 249.1 15.78
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.47901 0.81198 -15.369 <2e-16 ***
## s_age 0.26310 0.46740 0.563 0.574
## participant_sexMale -0.07088 0.93129 -0.076 0.939
## study_siteUrban -1.66910 1.30149 -1.282 0.200
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M
## s_age -0.115
## prtcpnt_sxM -0.581 0.033
## stdy_stUrbn -0.228 -0.044 -0.009
q90_out_int_model <- glmer(q90_outlier ~ s_age + participant_sex +
study_site + participant_sex*study_site +
s_age*study_site + s_age*participant_sex +
(1 | rec_id),
data = df_contact %>% distinct(),
family = binomial(link = "logit"))
summary(q90_out_int_model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## q90_outlier ~ s_age + participant_sex + study_site + participant_sex *
## study_site + s_age * study_site + s_age * participant_sex +
## (1 | rec_id)
## Data: df_contact %>% distinct()
##
## AIC BIC logLik deviance df.resid
## 2594.4 2657.0 -1289.2 2578.4 18451
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.03323 -0.00192 -0.00135 -0.00080 1.00287
##
## Random effects:
## Groups Name Variance Std.Dev.
## rec_id (Intercept) 248.1 15.75
## Number of obs: 18459, groups: rec_id, 1363
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -12.48407 0.84496 -14.775 <2e-16 ***
## s_age 0.35327 0.68893 0.513 0.608
## participant_sexMale -0.07991 1.03104 -0.078 0.938
## study_siteUrban -1.70445 1.92519 -0.885 0.376
## participant_sexMale:study_siteUrban 0.15749 2.62135 0.060 0.952
## s_age:study_siteUrban -0.55342 1.26942 -0.436 0.663
## s_age:participant_sexMale -0.01178 0.93614 -0.013 0.990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) s_age prtc_M stdy_U p_M:_U s_g:_U
## s_age -0.235
## prtcpnt_sxM -0.621 0.180
## stdy_stUrbn -0.338 0.137 0.280
## prtcpn_M:_U 0.253 -0.111 -0.404 -0.729
## s_g:stdy_sU 0.062 -0.263 -0.010 0.080 -0.016
## s_g:prtcp_M 0.156 -0.669 -0.211 -0.120 0.144 -0.055